Enter the directory of the maca folder on your drive and the name of the tissue you want to analyze.
tissue_of_interest = "Liver"
Load the requisite packages and some additional helper functions.
library(here)
## here() starts at /Users/olgabot/code/tabula-muris
library(useful)
## Loading required package: ggplot2
library(Seurat)
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
## Loading required package: Matrix
## Warning: namespace 'Biobase' is not available and has been replaced
## by .GlobalEnv when processing object 'call.'
## Warning: namespace 'lme4' is not available and has been replaced
## by .GlobalEnv when processing object 'call.'
## Warning: namespace 'MatrixModels' is not available and has been replaced
## by .GlobalEnv when processing object 'call.'
## Warning: namespace 'Biobase' is not available and has been replaced
## by .GlobalEnv when processing object 'call.'
## Warning: namespace 'lme4' is not available and has been replaced
## by .GlobalEnv when processing object 'call.'
## Warning: namespace 'MatrixModels' is not available and has been replaced
## by .GlobalEnv when processing object 'call.'
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Matrix)
save_dir = here('00_data_ingest', 'tissue_robj')
# read the metadata to get the plates we want
plate_metadata_filename = here('00_data_ingest', 'facs_raw_data', 'metadata_FACS.csv')
plate_metadata <- read.csv(plate_metadata_filename, sep=",", header = TRUE)
colnames(plate_metadata)[1] <- "plate.barcode"
plate_metadata
## plate.barcode mouse.id tissue subtissue
## 1 D041914 3_8_M Bladder <NA>
## 2 D042253 3_9_M Bladder <NA>
## 3 MAA000487 3_10_M Bladder <NA>
## 4 B000610 3_56_F Bladder <NA>
## 5 B002764 3_38_F Bladder <NA>
## 6 B002771 3_39_F Bladder <NA>
## 7 MAA000538 3_8_M Brain_Neurons Cerebellum
## 8 MAA000550 3_10_M Brain_Neurons Hippocampus
## 9 MAA000553 3_10_M Brain_Neurons Hippocampus
## 10 MAA000560 3_10_M Brain_Neurons Cortex
## 11 MAA000561 3_10_M Brain_Neurons Cortex
## 12 MAA000563 3_10_M Brain_Neurons Hippocampus
## 13 MAA000564 3_10_M Brain_Neurons Striatum
## 14 MAA000570 3_8_M Brain_Microglia Cerebellum
## 15 MAA000571 3_9_M Brain_Microglia Cerebellum
## 16 MAA000572 3_8_M Brain_Microglia Cortex
## 17 MAA000573 3_9_M Brain_Microglia Cortex
## 18 MAA000578 3_10_M Brain_Neurons Cerebellum
## 19 MAA000581 3_10_M Brain_Neurons Cerebellum
## 20 MAA000590 3_9_M Brain_Microglia Striatum
## 21 MAA000591 3_8_M Brain_Microglia Hippocampus
## 22 MAA000592 3_9_M Brain_Microglia Hippocampus
## 23 MAA000593 3_8_M Brain_Microglia Striatum
## 24 MAA000603 3_10_M Brain_Microglia Hippocampus
## 25 MAA000604 3_10_M Brain_Microglia Cortex
## 26 MAA000605 3_10_M Brain_Microglia Cerebellum
## 27 MAA000615 3_10_M Brain_Neurons Striatum
## 28 MAA000617 3_10_M Brain_Microglia Striatum
## 29 MAA000638 3_9_M Brain_Neurons Cerebellum
## 30 MAA000641 3_9_M Brain_Neurons Cerebellum
## 31 MAA000902 3_11_M Brain_Neurons Striatum
## 32 MAA000923 3_9_M Brain_Neurons Hippocampus
## 33 MAA000924 3_8_M Brain_Neurons Cerebellum
## 34 MAA000925 3_9_M Brain_Neurons Striatum
## 35 MAA000926 3_9_M Brain_Neurons Cortex
## 36 MAA000928 3_9_M Brain_Neurons Cerebellum
## 37 MAA000930 3_8_M Brain_Neurons Cortex
## 38 MAA000932 3_11_M Brain_Neurons Hippocampus
## 39 MAA000935 3_8_M Brain_Neurons Hippocampus
## 40 MAA000940 3_8_M Brain_Neurons Striatum
## 41 MAA000941 3_8_M Brain_Neurons Hippocampus
## 42 MAA000942 3_8_M Brain_Neurons Striatum
## 43 MAA000944 3_9_M Brain_Neurons Cortex
## 44 MAA000947 3_9_M Brain_Neurons Hippocampus
## 45 MAA000424 3_11_M Brain_Neurons Cerebellum
## 46 MAA001845 3_39_F Brain_Neurons Cerebellum
## 47 MAA001853 3_38_F Brain_Neurons Cerebellum
## 48 MAA001854 3_38_F Brain_Neurons Cerebellum
## 49 MAA001856 3_39_F Brain_Neurons Cerebellum
## 50 MAA001871 3_39_F Colon Proximal
## 51 MAA001894 3_39_F Brain_Neurons Cortex
## 52 MAA001895 3_38_F Brain_Neurons Cortex
## 53 B000621 3_56_F Brain_Neurons Hippocampus
## 54 B000825 3_38_F Brain_Microglia Striatum
## 55 B000826 3_39_F Brain_Microglia Cortex
## 56 B003274 3_38_F Brain_Neurons Striatum
## 57 B003275 3_38_F Brain_Neurons Hippocampus
## 58 B003277 3_39_F Brain_Neurons Hippocampus
## 59 B003278 3_38_F Brain_Microglia Cortex
## 60 B003279 3_38_F Brain_Microglia Cerebellum
## 61 B003281 3_39_F Brain_Microglia Hippocampus
## 62 B003290 3_38_F Brain_Neurons Striatum
## 63 B003292 3_39_F Brain_Microglia Striatum
## 64 B001688 3_38_F Brain_Neurons Hippocampus
## 65 B001689 3_38_F Brain_Microglia Hippocampus
## 66 B000404 3_56_F Brain_Neurons Cerebellum
## 67 B003728 3_56_F Brain_Neurons Striatum
## 68 B001176 3_56_F Brain_Microglia Striatum
## 69 MAA000583 3_9_M Colon Distal
## 70 MAA000611 3_8_M Colon Proximal
## 71 MAA000612 3_9_M Colon Proximal
## 72 MAA000870 3_10_M Colon Distal
## 73 MAA000871 3_11_M Colon Proximal
## 74 MAA000909 3_10_M Colon Proximal
## 75 MAA000937 3_8_M Colon Distal
## 76 MAA001539 3_56_F Colon Distal
## 77 MAA001632 3_56_F Colon Proximal
## 78 MAA001869 3_38_F Colon Proximal
## 79 MAA001872 3_39_F Colon Proximal
## 80 MAA001873 3_38_F Colon Distal
## 81 MAA001875 3_39_F Colon Distal
## 82 MAA000385 3_10_M Fat SCAT
## 83 MAA000386 3_11_M Fat SCAT
## 84 MAA000388 3_11_M Fat GAT
## 85 MAA000439 3_10_M Fat GAT
## 86 MAA000441 3_10_M Fat BAT
## 87 MAA000442 3_11_M Fat BAT
## 88 MAA000510 3_8_M Fat SCAT
## 89 MAA000554 3_8_M Fat GAT
## 90 MAA000556 3_9_M Fat BAT
## 91 MAA000599 3_8_M Fat BAT
## 92 MAA000873 3_10_M Fat MAT
## 93 MAA000877 3_11_M Fat MAT
## 94 MAA000913 3_9_M Fat GAT
## 95 MAA000914 3_9_M Fat MAT
## 96 MAA000934 3_9_M Fat SCAT
## 97 MAA000531 3_8_M Fat MAT
## 98 D043520 3_39_F Fat MAT
## 99 D043526 3_39_F Fat SCAT
## 100 D045058 3_39_F Fat GAT
## 101 D045064 3_38_F Fat MAT
## 102 D045067 3_38_F Fat GAT
## 103 B000127 3_38_F Fat SCAT
## 104 B001216 3_38_F Fat BAT
## 105 B002707 3_39_F Fat BAT
## 106 B001960 3_56_F Fat BAT
## 107 B002314 3_56_F Fat MAT
## 108 B002954 3_56_F Fat SCAT
## 109 B002955 3_56_F Fat GAT
## 110 MAA000398 3_9_M Heart LA
## 111 MAA000399 3_9_M Heart RV
## 112 MAA000400 3_8_M Heart LA
## 113 MAA000452 3_8_M Heart RV
## 114 MAA000586 3_8_M Heart RA
## 115 MAA000587 3_8_M Heart LV
## 116 MAA000589 3_9_M Heart LV
## 117 MAA000594 3_8_M Heart Aorta
## 118 MAA000595 3_9_M Heart Aorta
## 119 MAA000898 3_11_M Heart RV
## 120 MAA000899 3_10_M Heart RA
## 121 MAA000901 3_10_M Heart RV
## 122 MAA000903 3_11_M Heart RA
## 123 MAA000906 3_11_M Heart Aorta
## 124 MAA000908 3_10_M Heart Aorta
## 125 MAA000917 3_11_M Heart LV
## 126 MAA000918 3_10_M Heart LV
## 127 MAA000919 3_10_M Heart LA
## 128 MAA000920 3_11_M Heart LA
## 129 MAA000936 3_9_M Heart RA
## 130 B000633 3_56_F Heart RV
## 131 B000634 3_56_F Heart LA
## 132 B000636 3_56_F Heart LV
## 133 B002010 3_39_F Heart RA
## 134 B002011 3_38_F Heart RA
## 135 B002421 3_39_F Heart LV
## 136 B002423 3_39_F Heart RV
## 137 B002427 3_39_F Heart LA
## 138 B002428 3_38_F Heart LA
## 139 B002429 3_38_F Heart LV
## 140 B002430 3_38_F Heart Aorta
## 141 B002431 3_39_F Heart Aorta
## 142 B000412 3_56_F Heart RA
## 143 MAA100037 3_10_M/3_11_M Heart ?
## 144 MAA100096 3_38_F/3_39_F Heart ?
## 145 MAA100097 3_38_F/3_39_F Heart LA/RA
## 146 MAA000545 3_8_M Kidney <NA>
## 147 MAA000752 3_10_M Kidney <NA>
## 148 MAA000801 3_11_M Kidney <NA>
## 149 MAA000922 3_9_M Kidney <NA>
## 150 B001717 3_38_F Kidney <NA>
## 151 B002775 3_39_F Kidney <NA>
## 152 MAA000377 3_9_M Liver Non-hepatocytes
## 153 MAA000907 3_11_M Liver Non-hepatocytes
## 154 MAA100039 3_11_M Liver Hepatocytes
## 155 MAA100040 3_11_M Liver Hepatocytes
## 156 MAA100138 3_56_F Liver Hepatocytes
## 157 MAA100140 3_57_F Liver Hepatocytes
## 158 MAA100041 3_9_M Liver Hepatocytes
## 159 MAA100042 3_9_M Liver Hepatocytes
## 160 MAA000526 3_9_M Lung <NA>
## 161 MAA000530 3_8_M Lung <NA>
## 162 MAA000839 3_11_M Lung <NA>
## 163 MAA000891 3_10_M Lung <NA>
## 164 MAA001847 3_39_F Lung EPCAM
## 165 MAA001889 3_38_F Lung EPCAM
## 166 MAA001892 3_38_F Lung Endomucin
## 167 D043522 3_39_F Lung Endomucin
## 168 B002432 3_38_F Mammary Mammary_Gland
## 169 B002433 3_38_F Mammary Mammary_Gland
## 170 B002435 3_39_F Mammary Mammary_Gland
## 171 B002436 3_39_F Mammary Mammary_Gland
## 172 B002437 3_39_F Mammary Mammary_Gland
## 173 B002438 3_38_F Mammary Mammary_Gland
## 174 B000166 3_56_F Mammary Mammary_Gland
## 175 B000167 3_56_F Mammary Mammary_Gland
## 176 B000168 3_57_F Mammary Mammary_Gland
## 177 B002309 3_57_F Mammary Mammary_Gland
## 178 D042044 3_9_M Marrow B-cells
## 179 D042467 3_9_M Marrow T-cells
## 180 D041912 3_8_M Marrow Granulocytes
## 181 D042193 3_8_M Marrow T-cells
## 182 D042479 3_8_M Marrow B-cells
## 183 MAA000409 3_10_M Marrow Granulocytes
## 184 MAA000596 3_9_M Marrow KLS
## 185 MAA000600 3_8_M Marrow KLS
## 186 MAA000639 3_9_M Marrow Granulocytes
## 187 MAA000652 3_10_M Marrow B-cells
## 188 MAA000844 3_10_M Marrow T-cells
## 189 MAA000848 3_10_M Marrow KLS
## 190 D045139 3_39_F Marrow Granulocytes
## 191 D045140 3_38_F Marrow Granulocytes
## 192 MAA001842 3_38_F Marrow T-cells
## 193 MAA001844 3_38_F Marrow KLS
## 194 MAA001883 3_39_F Marrow B-cells
## 195 MAA001884 3_38_F Marrow B-cells
## 196 MAA001887 3_39_F Marrow KLS
## 197 MAA001888 3_39_F Marrow T-cells
## 198 D042103 3_11_M Muscle ForelimbandHindlimb
## 199 D042105 3_11_M Muscle Diaphragm
## 200 D042180 3_10_M Muscle Diaphragm
## 201 D042185 3_9_M Muscle ForelimbandHindlimb
## 202 D042186 3_8_M Muscle ForelimbandHindlimb
## 203 MAA001454 3_38_F Muscle Diaphragm
## 204 B002762 3_39_F Muscle Diaphragm
## 205 B002765 3_38_F Muscle ForelimbandHindlimb
## 206 B002769 3_39_F Muscle ForelimbandHindlimb
## 207 D042473 3_10_M Muscle ForelimbandHindlimb
## 208 MAA000574 3_8_M Pancreas Exocrine
## 209 MAA000577 3_8_M Pancreas Endocrine
## 210 MAA000884 3_10_M Pancreas Endocrine
## 211 MAA000910 3_10_M Pancreas Exocrine
## 212 MAA001857 3_38_F Pancreas Endocrine
## 213 MAA001861 3_39_F Pancreas Exocrine
## 214 MAA001862 3_39_F Pancreas Endocrine
## 215 MAA001868 3_38_F Pancreas Exocrine
## 216 MAA000427 3_11_M Skin Anagen
## 217 MAA000435 3_10_M Skin Anagen
## 218 MAA000549 3_8_M Skin Anagen
## 219 MAA000597 3_9_M Skin Anagen
## 220 MAA000614 3_10_M Skin <NA>
## 221 MAA000927 3_9_M Skin <NA>
## 222 MAA000938 3_8_M Skin <NA>
## 223 B003283 3_38_F Skin <NA>
## 224 B000126 3_39_F Skin <NA>
## 225 MAA000508 3_9_M Spleen <NA>
## 226 MAA000559 3_8_M Spleen <NA>
## 227 MAA000776 3_10_M Spleen <NA>
## 228 MAA000779 3_11_M Spleen <NA>
## 229 B000971 3_39_F Spleen <NA>
## 230 B001750 3_38_F Spleen <NA>
## 231 MAA000607 3_9_M Thymus <NA>
## 232 MAA000609 3_8_M Thymus <NA>
## 233 MAA000880 3_11_M Thymus <NA>
## 234 B000827 3_38_F Thymus Flowthrough
## 235 B003284 3_39_F Thymus Epithelium
## 236 B003285 3_38_F Thymus Epithelium
## 237 B003291 3_39_F Thymus Flowthrough
## 238 D041894 3_9_M Tongue <NA>
## 239 D041904 3_10_M Tongue <NA>
## 240 D041911 3_8_M Tongue <NA>
## 241 B002777 3_39_F Tongue <NA>
## 242 B002780 3_38_F Tongue <NA>
## 243 D042474 3_8_M Trachea <NA>
## 244 D042475 3_9_M Trachea <NA>
## 245 MAA000496 3_10_M Trachea <NA>
## 246 MAA001865 3_38_F Trachea <NA>
## 247 MAA001867 3_39_F Trachea <NA>
## FACS.selection mouse.sex
## 1 Multiple M
## 2 Multiple M
## 3 Multiple M
## 4 Multiple F
## 5 Multiple F
## 6 Multiple F
## 7 Neurons M
## 8 Neurons M
## 9 <NA> M
## 10 Neurons M
## 11 Neurons M
## 12 Neurons M
## 13 Neurons M
## 14 Microglia M
## 15 Microglia M
## 16 Microglia M
## 17 Microglia M
## 18 Neurons M
## 19 Neurons M
## 20 Microglia M
## 21 Microglia M
## 22 Microglia M
## 23 Microglia M
## 24 Microglia M
## 25 Microglia M
## 26 Microglia M
## 27 Neurons M
## 28 Microglia M
## 29 Neurons M
## 30 Multiple M
## 31 Neurons M
## 32 Neurons M
## 33 Neurons M
## 34 Neurons M
## 35 Neurons M
## 36 Neurons M
## 37 Neurons M
## 38 Neurons M
## 39 Neurons M
## 40 Neurons M
## 41 Neurons M
## 42 Neurons M
## 43 Neurons M
## 44 Neurons M
## 45 Neurons M
## 46 Neurons F
## 47 Neurons F
## 48 Neurons F
## 49 Neurons F
## 50 Multiple F
## 51 Neurons F
## 52 Neurons F
## 53 Neurons F
## 54 Microglia F
## 55 Microglia F
## 56 Neurons F
## 57 Neurons F
## 58 Neurons F
## 59 Microglia F
## 60 Microglia F
## 61 Microglia F
## 62 Neurons F
## 63 Microglia F
## 64 Neurons F
## 65 Microglia F
## 66 Neurons F
## 67 Neurons F
## 68 Microglia F
## 69 Multiple M
## 70 Multiple M
## 71 Multiple M
## 72 Multiple M
## 73 Multiple M
## 74 Multiple M
## 75 Multiple M
## 76 Multiple F
## 77 Multiple F
## 78 Multiple F
## 79 Multiple F
## 80 Multiple F
## 81 Multiple F
## 82 Multiple M
## 83 Multiple M
## 84 Multiple M
## 85 Multiple M
## 86 Multiple M
## 87 Multiple M
## 88 Multiple M
## 89 Multiple M
## 90 Multiple M
## 91 Multiple M
## 92 Multiple M
## 93 Multiple M
## 94 Multiple M
## 95 Multiple M
## 96 Multiple M
## 97 Multiple M
## 98 Multiple F
## 99 Multiple F
## 100 Multiple F
## 101 Multiple F
## 102 Multiple F
## 103 Multiple F
## 104 Multiple F
## 105 Multiple F
## 106 Multiple F
## 107 Multiple F
## 108 Multiple F
## 109 Multiple F
## 110 Viable M
## 111 Viable M
## 112 Viable M
## 113 Viable M
## 114 Viable M
## 115 Viable M
## 116 Viable M
## 117 Viable M
## 118 Viable M
## 119 Viable M
## 120 Viable M
## 121 Viable M
## 122 Viable M
## 123 Viable M
## 124 Viable M
## 125 Viable M
## 126 Viable M
## 127 Viable M
## 128 Viable M
## 129 Viable M
## 130 Viable F
## 131 Viable F
## 132 Viable F
## 133 Viable F
## 134 Viable F
## 135 Viable F
## 136 Viable F
## 137 Viable F
## 138 Viable F
## 139 Viable F
## 140 Viable F
## 141 Viable F
## 142 Viable F
## 143 cardiomyocytes M
## 144 cardiomyocytes F
## 145 cardiomyocytes F
## 146 Viable M
## 147 Viable M
## 148 Viable M
## 149 Viable M
## 150 Viable F
## 151 Viable F
## 152 Viable M
## 153 Viable M
## 154 <NA> M
## 155 <NA> M
## 156 <NA> F
## 157 <NA> F
## 158 <NA> M
## 159 <NA> M
## 160 Multiple M
## 161 Multiple M
## 162 Multiple M
## 163 Multiple M
## 164 Multiple F
## 165 Multiple F
## 166 Multiple F
## 167 Multiple F
## 168 Multiple F
## 169 Multiple F
## 170 Multiple F
## 171 Multiple F
## 172 Multiple F
## 173 Multiple F
## 174 Multiple F
## 175 Multiple F
## 176 Multiple F
## 177 Multiple F
## 178 Multiple M
## 179 Multiple M
## 180 Multiple M
## 181 Multiple M
## 182 Multiple M
## 183 Multiple M
## 184 Multiple M
## 185 Multiple M
## 186 Multiple M
## 187 Multiple M
## 188 Multiple M
## 189 Multiple M
## 190 Multiple F
## 191 Multiple F
## 192 Multiple F
## 193 Multiple F
## 194 Multiple F
## 195 Multiple F
## 196 Multiple F
## 197 Multiple F
## 198 Multiple M
## 199 Multiple M
## 200 Multiple M
## 201 Multiple M
## 202 Multiple M
## 203 CD31+; CD45+; CD31-CD45-Sca1+; CD31-CD45-Sca1-VCAM1+ F
## 204 CD31+; CD45+; CD31-CD45-Sca1+; CD31-CD45-Sca1-VCAM1+ F
## 205 CD31+; CD45+; CD31-CD45-Sca1+; CD31-CD45-Sca1-VCAM1+ F
## 206 CD31+; CD45+; CD31-CD45-Sca1+; CD31-CD45-Sca1-VCAM1+ F
## 207 Multiple M
## 208 Viable M
## 209 Viable M
## 210 Viable M
## 211 Viable M
## 212 Viable F
## 213 Viable F
## 214 Viable F
## 215 Viable F
## 216 Multiple M
## 217 Multiple M
## 218 Multiple M
## 219 Multiple M
## 220 Multiple M
## 221 Multiple M
## 222 Multiple M
## 223 Multiple F
## 224 Multiple F
## 225 Viable M
## 226 Viable M
## 227 Viable M
## 228 Viable M
## 229 Viable F
## 230 Viable F
## 231 Multiple M
## 232 Multiple M
## 233 Multiple M
## 234 Multiple F
## 235 Multiple F
## 236 Multiple F
## 237 Multiple F
## 238 Multiple M
## 239 Multiple M
## 240 Multiple M
## 241 Multiple F
## 242 Multiple F
## 243 Multiple M
## 244 Multiple M
## 245 Multiple M
## 246 Multiple F
## 247 Multiple F
Subset the metadata on the tissue.
tissue_plates = filter(plate_metadata, tissue == tissue_of_interest)[,c('plate.barcode','tissue','subtissue','mouse.sex')]
tissue_plates
## plate.barcode tissue subtissue mouse.sex
## 1 MAA000377 Liver Non-hepatocytes M
## 2 MAA000907 Liver Non-hepatocytes M
## 3 MAA100039 Liver Hepatocytes M
## 4 MAA100040 Liver Hepatocytes M
## 5 MAA100138 Liver Hepatocytes F
## 6 MAA100140 Liver Hepatocytes F
## 7 MAA100041 Liver Hepatocytes M
## 8 MAA100042 Liver Hepatocytes M
Load the read count data.
#Load the gene names and set the metadata columns by opening the first file
filename = here('00_data_ingest', 'facs_raw_data', 'FACS', paste0(tissue_of_interest, '-counts.csv'))
raw.data = read.csv(filename, sep=",", row.names=1)
# raw.data = data.frame(row.names = rownames(raw.data))
corner(raw.data)
## F18.MAA000377.3_9_M.1.1 J20.MAA000377.3_9_M.1.1
## 0610005C13Rik 0 0
## 0610007C21Rik 0 0
## 0610007L01Rik 23 0
## 0610007N19Rik 0 0
## 0610007P08Rik 0 0
## F19.MAA000377.3_9_M.1.1 J21.MAA000377.3_9_M.1.1
## 0610005C13Rik 0 0
## 0610007C21Rik 0 0
## 0610007L01Rik 0 0
## 0610007N19Rik 0 0
## 0610007P08Rik 0 0
## F20.MAA000377.3_9_M.1.1
## 0610005C13Rik 0
## 0610007C21Rik 2
## 0610007L01Rik 0
## 0610007N19Rik 0
## 0610007P08Rik 0
Make a vector of plate barcodes for each cell
plate.barcodes = lapply(colnames(raw.data), function(x) strsplit(strsplit(x, "_")[[1]][1], '.', fixed=TRUE)[[1]][2])
head(plate.barcodes)
## [[1]]
## [1] "MAA000377"
##
## [[2]]
## [1] "MAA000377"
##
## [[3]]
## [1] "MAA000377"
##
## [[4]]
## [1] "MAA000377"
##
## [[5]]
## [1] "MAA000377"
##
## [[6]]
## [1] "MAA000377"
Use only the metadata rows corresponding to Bladder plates. Make a plate barcode dataframe to “expand” the per-plate metadata to be per-cell.
barcode.df = t.data.frame(as.data.frame(plate.barcodes))
rownames(barcode.df) = colnames(raw.data)
colnames(barcode.df) = c('plate.barcode')
head(barcode.df)
## plate.barcode
## F18.MAA000377.3_9_M.1.1 "MAA000377"
## J20.MAA000377.3_9_M.1.1 "MAA000377"
## F19.MAA000377.3_9_M.1.1 "MAA000377"
## J21.MAA000377.3_9_M.1.1 "MAA000377"
## F20.MAA000377.3_9_M.1.1 "MAA000377"
## J22.MAA000377.3_9_M.1.1 "MAA000377"
rnames = row.names(barcode.df)
meta.data <- merge(barcode.df, plate_metadata, by='plate.barcode', sort = F)
row.names(meta.data) <- rnames
# Sort cells by plate barcode because that's how the data was originally
meta.data = meta.data[order(meta.data$plate.barcode), ]
corner(meta.data)
## plate.barcode mouse.id tissue subtissue
## F18.MAA000377.3_9_M.1.1 MAA000377 3_9_M Liver Non-hepatocytes
## J20.MAA000377.3_9_M.1.1 MAA000377 3_9_M Liver Non-hepatocytes
## F19.MAA000377.3_9_M.1.1 MAA000377 3_9_M Liver Non-hepatocytes
## J21.MAA000377.3_9_M.1.1 MAA000377 3_9_M Liver Non-hepatocytes
## F20.MAA000377.3_9_M.1.1 MAA000377 3_9_M Liver Non-hepatocytes
## FACS.selection
## F18.MAA000377.3_9_M.1.1 Viable
## J20.MAA000377.3_9_M.1.1 Viable
## F19.MAA000377.3_9_M.1.1 Viable
## J21.MAA000377.3_9_M.1.1 Viable
## F20.MAA000377.3_9_M.1.1 Viable
raw.data = raw.data[, rownames(meta.data)]
corner(raw.data)
## F18.MAA000377.3_9_M.1.1 J20.MAA000377.3_9_M.1.1
## 0610005C13Rik 0 0
## 0610007C21Rik 0 0
## 0610007L01Rik 23 0
## 0610007N19Rik 0 0
## 0610007P08Rik 0 0
## F19.MAA000377.3_9_M.1.1 J21.MAA000377.3_9_M.1.1
## 0610005C13Rik 0 0
## 0610007C21Rik 0 0
## 0610007L01Rik 0 0
## 0610007N19Rik 0 0
## 0610007P08Rik 0 0
## F20.MAA000377.3_9_M.1.1
## 0610005C13Rik 0
## 0610007C21Rik 2
## 0610007L01Rik 0
## 0610007N19Rik 0
## 0610007P08Rik 0
Process the raw data and load it into the Seurat object.
# Find ERCC's, compute the percent ERCC, and drop them from the raw data.
erccs <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = TRUE)
percent.ercc <- Matrix::colSums(raw.data[erccs, ])/Matrix::colSums(raw.data)
ercc.index <- grep(pattern = "^ERCC-", x = rownames(x = raw.data), value = FALSE)
raw.data <- raw.data[-ercc.index,]
# Create the Seurat object with all the data
tiss <- CreateSeuratObject(raw.data = raw.data, project = tissue_of_interest,
min.cells = 5, min.genes = 5)
tiss <- AddMetaData(object = tiss, meta.data)
tiss <- AddMetaData(object = tiss, percent.ercc, col.name = "percent.ercc")
# Change default name for sums of counts from nUMI to nReads
colnames(tiss@meta.data)[colnames(tiss@meta.data) == 'nUMI'] <- 'nReads'
# Create metadata columns for annotations and subannotations
tiss@meta.data[,'annotation'] <- NA
tiss@meta.data[,'subannotation'] <- NA
Calculate percent ribosomal genes.
ribo.genes <- grep(pattern = "^Rp[sl][[:digit:]]", x = rownames(x = tiss@data), value = TRUE)
percent.ribo <- Matrix::colSums(tiss@raw.data[ribo.genes, ])/Matrix::colSums(tiss@raw.data)
tiss <- AddMetaData(object = tiss, metadata = percent.ribo, col.name = "percent.ribo")
A sanity check: genes per cell vs reads per cell.
### !clusters of hep. with nGene 6000 - 8000, not present in 10x run from same mice
GenePlot(object = tiss, gene1 = "nReads", gene2 = "nGene", use.raw=T)
Filter out cells with few reads and few genes.
tiss <- FilterCells(object = tiss, subset.names = c("nGene", "nReads"),
low.thresholds = c(500, 50000), high.thresholds = c(25000, 5000000))
Normalize the data, then regress out correlation with total reads
tiss <- NormalizeData(object = tiss)
tiss <- ScaleData(object = tiss, vars.to.regress = c("nReads", "percent.ribo","Rn45s"))
## [1] "Regressing out nReads" "Regressing out percent.ribo"
## [3] "Regressing out Rn45s"
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
## [1] "Scaling data matrix"
##
|
| | 0%
|
|=================================================================| 100%
tiss <- FindVariableGenes(object = tiss, do.plot = TRUE, x.high.cutoff = Inf, y.cutoff = 0.5)
Run Principal Component Analysis.
tiss <- RunPCA(object = tiss, do.print = FALSE)
tiss <- ProjectPCA(object = tiss, do.print = FALSE)
Later on (in FindClusters and TSNE) you will pick a number of principal components to use. This has the effect of keeping the major directions of variation in the data and, ideally, supressing noise. There is no correct answer to the number to use, but a decent rule of thumb is to go until the plot plateaus.
PCElbowPlot(object = tiss)
Choose the number of principal components to use.
# Set number of principal components.
n.pcs = 10
The clustering is performed based on a nearest neighbors graph. Cells that have similar expression will be joined together. The Louvain algorithm looks for groups of cells with high modularity–more connections within the group than between groups. The resolution parameter determines the scale…higher resolution will give more clusters, lower resolution will give fewer.
For the top-level clustering, aim to under-cluster instead of over-cluster. It will be easy to subset groups and further analyze them below.
# Set resolution
res.used <- 0.3
tiss <- FindClusters(object = tiss, reduction.type = "pca", dims.use = 1:n.pcs,
resolution = res.used, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
To visualize
# If cells are too spread out, you can raise the perplexity. If you have few cells, try a lower perplexity (but never less than 10).
tiss <- RunTSNE(object = tiss, dims.use = 1:n.pcs, seed.use = 10, perplexity=15)
# note that you can set do.label=T to help label individual clusters
TSNEPlot(object = tiss, do.label = T)
tiss = BuildClusterTree(tiss)
## [1] "Finished averaging RNA for cluster 0"
## [1] "Finished averaging RNA for cluster 1"
## [1] "Finished averaging RNA for cluster 2"
## [1] "Finished averaging RNA for cluster 3"
## [1] "Finished averaging RNA for cluster 4"
## [1] "Finished averaging RNA for cluster 5"
table(tiss@ident)
##
## 0 1 2 3 4 5
## 296 196 103 51 35 29
Check expression of genes of interset.
Dotplots let you see the intensity of exppression and the fraction of cells expressing for each of your genes of interest.
VlnPlot(object = tiss, features.plot = c("Klrb1a", "Klrb1c", "Cd4", "Cd8a"), use.raw = TRUE, nCol = 2)
GenePlot(tiss, 'Cxcr6', 'Nkg7', use.raw = T)
GenePlot(tiss, 'Cxcr6', 'Cd8a', use.raw = T)
GenePlot(tiss, 'Cyp2e1', 'Cyp2f2', use.raw = T)
How big are the clusters?
table(tiss@ident)
##
## 0 1 2 3 4 5
## 296 196 103 51 35 29
Which markers identify a specific cluster?
#clust.markers <- FindMarkers(object = tiss, ident.1 = 6, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
#print(x = head(x= clust.markers, n = 20))
clust.markers <- FindMarkers(object = tiss, ident.1 = 4, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
print(x = head(x= clust.markers, n = 20))
## p_val avg_diff pct.1 pct.2
## Zap70 1.482652e-66 1.8059677 0.943 0.308
## Ms4a4b 6.730391e-56 2.8092692 0.971 0.030
## Nkg7 1.709649e-54 3.4867188 0.914 0.025
## Selplg 1.064103e-53 2.3380461 1.000 0.431
## Il2rb 1.458926e-53 2.6275301 0.971 0.010
## Lck 5.829937e-52 2.7934577 1.000 0.071
## Cd3g 2.954488e-51 2.3884440 0.886 0.024
## Ccl5 2.568174e-49 2.9250136 0.943 0.037
## Skap1 9.844417e-49 2.0781412 0.971 0.024
## Ctsw 3.158630e-47 2.4765132 0.914 0.018
## Cxcr6 1.727933e-46 3.4080946 0.829 0.006
## Coro1a 4.285152e-46 2.4679537 1.000 0.271
## Tmsb10 5.952440e-45 0.8141295 1.000 0.261
## Gimap3 1.042351e-44 2.4229643 1.000 0.058
## Cd3d 5.170914e-44 1.8587218 0.886 0.027
## B4galnt1 9.517737e-44 1.9079277 0.943 0.385
## Arsb 7.171493e-43 1.0421826 0.543 0.299
## Cd3e 2.377490e-42 0.7760406 0.886 0.012
## Id2 4.534231e-42 1.8714379 0.971 0.721
## Abcb1a 9.804462e-42 0.5947887 0.600 0.043
You can also compute all markers for all clusters at once. This may take some time.
tiss.markers <- FindAllMarkers(object = tiss, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
Display the top markers you computed above.
top_genes <- tiss.markers %>% group_by(cluster) %>% top_n(50, avg_diff)
print(top_genes)
## # A tibble: 300 x 6
## # Groups: cluster [6]
## p_val avg_diff pct.1 pct.2 cluster gene
## <dbl> <dbl> <dbl> <dbl> <fctr> <chr>
## 1 1.716084e-106 1.295394 0.956 0.459 0 Aldob
## 2 5.020611e-101 1.466199 0.973 0.529 0 Fabp1
## 3 6.195928e-100 1.791094 0.976 0.754 0 Mup3
## 4 1.406955e-96 1.302912 0.986 0.700 0 Apoa2
## 5 9.543387e-95 1.402257 0.976 0.635 0 Apoc1
## 6 2.036623e-89 1.944940 0.932 0.780 0 Gstp1
## 7 2.122724e-89 1.104824 0.946 0.425 0 Adh1
## 8 1.670452e-88 1.521682 0.956 0.771 0 Scp2
## 9 4.986071e-88 2.698246 0.659 0.592 0 Saa1
## 10 2.866670e-85 1.581322 0.949 0.437 0 Bhmt
## # ... with 290 more rows
# find all markers distinguishing FEMALE vs MALE
#cluster_sex.markers <- FindMarkers(object = tiss, ident.1 = c(0), ident.2 = c(2,5),
# min.pct = 0.25)
#print(x = head(x = cluster_sex.markers, n = 50))
#write.csv(x = head(x = cluster_sex.markers, n = 20), "clus-marker-SEX.csv")
At a coarse level, we can use canonical markers to match the unbiased clustering to known cell types:
# stash current cluster IDs
tiss <- StashIdent(object = tiss, save.name = "cluster.ids")
# enumerate current cluster IDs and the labels for them
cluster.ids <- c(0, 1, 2, 3, 4, 5)
annotation <- c("hepatocyte", "endothelial cell of hepatic sinusoid", "hepatocyte", "Kupffer cell", "natural killer cell",
"B cell")
cell_ontology_id <- c("CL:0000182", "CL:1000398", "CL:0000182", "CL:0000091", "CL:0000623",
"CL:0000236")
tiss@meta.data[,'annotation'] <- plyr::mapvalues(x = tiss@ident, from = cluster.ids, to = annotation)
tiss@meta.data[,'cell_ontology_id'] <- plyr::mapvalues(x = tiss@ident, from = cluster.ids, to = cell_ontology_id)
tiss@meta.data[tiss@cell.names,'annotation'] <- as.character(tiss@meta.data$annotation)
tiss@meta.data[tiss@cell.names,'cell_ontology_id'] <- as.character(tiss@meta.data$cell_ontology_id)
TSNEPlot(object = tiss, do.label = TRUE, pt.size = 1, group.by='annotation')
Color by metadata, like plate barcode, to check for batch effects.
TSNEPlot(object = tiss, do.return = TRUE, group.by = "plate.barcode")
TSNEPlot(object = tiss, do.return = TRUE, group.by = "mouse.sex")
Print a table showing the count of cells in each identity category from each plate.
table(as.character(tiss@ident), as.character(tiss@meta.data$plate.barcode))
##
## MAA000377 MAA000907 MAA100039 MAA100040 MAA100041 MAA100042 MAA100138
## 0 29 52 65 36 21 25 34
## 1 21 162 0 0 0 13 0
## 2 5 4 0 0 1 0 48
## 3 12 39 0 0 0 0 0
## 4 7 28 0 0 0 0 0
## 5 6 23 0 0 0 0 0
##
## MAA100140
## 0 34
## 1 0
## 2 45
## 3 0
## 4 0
## 5 0
When you save the annotated tissue, please give it a name.
filename = here('00_data_ingest', 'tissue_seurat_robj',
paste0(tissue_of_interest, "_seurat_tiss.Robj"))
print(filename)
## [1] "/Users/olgabot/code/tabula-muris/00_data_ingest/tissue_seurat_robj/Liver_seurat_tiss.Robj"
save(tiss, file=filename)
# To reload a saved object
# filename = here('00_data_ingest', 'tissue_seurat_robj',
# paste0(tissue_of_interest, "_seurat_tiss.Robj"))
# load(file=filename)
So that Biohub can easily combine all your annotations, please export them as a simple csv.
head(tiss@meta.data)
## nGene nReads orig.ident plate.barcode mouse.id
## F18.MAA000377.3_9_M.1.1 2349 341976 Liver MAA000377 3_9_M
## J20.MAA000377.3_9_M.1.1 2655 358887 Liver MAA000377 3_9_M
## F19.MAA000377.3_9_M.1.1 1605 378745 Liver MAA000377 3_9_M
## J21.MAA000377.3_9_M.1.1 1538 169724 Liver MAA000377 3_9_M
## F20.MAA000377.3_9_M.1.1 2625 371694 Liver MAA000377 3_9_M
## J22.MAA000377.3_9_M.1.1 1720 152241 Liver MAA000377 3_9_M
## tissue subtissue FACS.selection mouse.sex
## F18.MAA000377.3_9_M.1.1 Liver Non-hepatocytes Viable M
## J20.MAA000377.3_9_M.1.1 Liver Non-hepatocytes Viable M
## F19.MAA000377.3_9_M.1.1 Liver Non-hepatocytes Viable M
## J21.MAA000377.3_9_M.1.1 Liver Non-hepatocytes Viable M
## F20.MAA000377.3_9_M.1.1 Liver Non-hepatocytes Viable M
## J22.MAA000377.3_9_M.1.1 Liver Non-hepatocytes Viable M
## percent.ercc annotation
## F18.MAA000377.3_9_M.1.1 0.03546739 natural killer cell
## J20.MAA000377.3_9_M.1.1 0.03315256 natural killer cell
## F19.MAA000377.3_9_M.1.1 0.03014714 hepatocyte
## J21.MAA000377.3_9_M.1.1 0.07767217 Kupffer cell
## F20.MAA000377.3_9_M.1.1 0.03400151 endothelial cell of hepatic sinusoid
## J22.MAA000377.3_9_M.1.1 0.06422069 Kupffer cell
## subannotation percent.ribo res.0.3 cluster.ids
## F18.MAA000377.3_9_M.1.1 NA 0.029622473 4 4
## J20.MAA000377.3_9_M.1.1 NA 0.024196700 4 4
## F19.MAA000377.3_9_M.1.1 NA 0.007964172 0 0
## J21.MAA000377.3_9_M.1.1 NA 0.023341161 3 3
## F20.MAA000377.3_9_M.1.1 NA 0.010819902 1 1
## J22.MAA000377.3_9_M.1.1 NA 0.020444509 3 3
## cell_ontology_id
## F18.MAA000377.3_9_M.1.1 CL:0000623
## J20.MAA000377.3_9_M.1.1 CL:0000623
## F19.MAA000377.3_9_M.1.1 CL:0000182
## J21.MAA000377.3_9_M.1.1 CL:0000091
## F20.MAA000377.3_9_M.1.1 CL:1000398
## J22.MAA000377.3_9_M.1.1 CL:0000091
filename = here('00_data_ingest', 'tissue_annotation_csv',
paste0(tissue_of_interest, "_annotation.csv"))
write.csv(tiss@meta.data[,c('plate.barcode','annotation','cell_ontology_id')], file=filename)